install.packages("devtools")library(devtools)install.packages("sdm")library(sdm)installAll()install.packages(c("raster", "rgdal", "sf", "rgeos", "ggplot2", "cowplot", "plotly", "mapview", "ggfortify", "scales", "factoextra ", "ggcorrplot", "prettyGraphs", "Rtsne", "DT", "parallel", "snow", "sdm", "FactoMineR", "rdist", "usdm", "dplyr", "tidyr", "purrr", "purrrlyr", "here", "fs", "stringr", "exactextractr", "gdalUtils", "dismo", "rgbif", "boot"))First we create a folder in our working directory to include input data.
# Create folder to store inputs.
if(!dir_exists('input_data')){
dir_create('input_data')
}
Now let’s download data from WorldClim 2.1. Note that when R downloads a file it has a timeout of 60 seconds. This may not be enough to download environmental data, so we can set options(timeout=n), where n is the number of seconds we need to download the data.
# This option allows us to control how much time do we need to download the data. If R takes more than 10 minutes (600 seconds) to download the data it will stop the download. Increase the timeout if needed.
options(timeout=600)
# Current data
# Files are automatically saved in input_data folder.
#WorldClim_data('current', variable = 'bioc', resolution = 10)
# Future data
gcms <- c("ac", "ae", "bc", "ca", "cc", "ce", "cn",
"ch", "cr", "ec", "ev", "fi", "gg", "gh", "hg",
"in", "ic", "ip", "me", "mi", "mp", "ml", "mr", "uk")
# WorldClim_data('future', variable = 'bioc', year = '2090', gcm = gcms, ssp = c('585'), resolution = 10)
# Downloading data from GBIF
# File is automatically saved in input_data folder
# spp_data <- GBIF_data('Colossoma macropomum')
spp_data <- here('input_data/spp_data.csv')
# Obtaining Natural Earth data:
#shape_study_area <- rnaturalearth::ne_download(scale = 50, type = "rivers_lake_centerlines", category = "physical")
Firstly, we name inputs and outputs, caring for using the correct extensions. a) Inputs:
# Shapefile (polygon or lines) delimiting study area.
shape_study_area <- here("input_data/shape_study_area/AmazonHydroRivers4.shp")
# Directory name containing current rasters to be rescaled.
folder_current_rasters <- here("input_data/WorldClim_data_current")
# Directory name containing future rasters to be rescaled.
folder_future_rasters <- here("input_data/WorldClim_data_future")
# Name of shapefile (.shp) for the study area to be saved.
output_shp_study_area <- here("output_data/grid/Colossoma_grid.shp")
# Name of R object (.rds) where the current rescaled variables will be saved.
output_shp_current <- here("output_data/WorldClim_data_current_rescaled/Colossoma_current.rds")
# Cell hight and width for the grid.
# This value depends on rasters projection. If rasters are in UTM, values will be in meters. If rasters are in decimal degrees (as in WorldClim 2.1), values will be in degrees. However, note that the function make_grid (used to build the grid above study area) has an argument called epsg where we can reproject spatial data. The epsg of study area is further transmitted to predictor variables. This means that even if WorldClim 2.1 is projected in decimal degrees we should address cell sizes in the desired epsg.
# Following values build a grid with 100 x 100 km.
cell_width = 100000
cell_height = 100000
# Note that setting those values to build a grid smaller than the input rasters may generate NaNs, causing some problems.
# If you have any variable in shape_study_area that you want to keep for rescaling, you can set here.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
names_var_shp_study_area <- list()
# As in the codeline above, here we set which variables in current rasters we want to keep for rescaling.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
current_var_names <- paste0('bio_', 1:19) # or NULL
# As in the codelines above, here we set which variables in future rasters we want to keep for rescaling.
# We will usually need at least the same variables as in the current scenario for projection.
# Set the correct names of variables using as much as 10 characters.
# Setting the names to list() will use none of the variables in the shape, while setting it to NULL will use all variables.
future_var_names <- current_var_names
The map of study area needs to be imported to R, so we can create a grid for the study area. This grid will be used for model building and projections.
shape_study_area %<>%
st_read() %>%
repair_shp()
## Reading layer `AmazonHydroRivers4' from data source
## `/Users/luizesser/Documents/Post-Doc/NAPI-EC/ForestFisher/Modelagens/input_data/shape_study_area/AmazonHydroRivers4.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 107294 features and 14 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -79.37708 ymin: -20.21042 xmax: -48.44375 ymax: 5.139583
## Geodetic CRS: WGS 84
if (output_shp_study_area %>% file_exists()== F){
grid_study_area <- shape_study_area %>%
make_grid(cell_width, cell_height, names_var_shp_study_area, epsg=6933)
output_shp_study_area %>%
path_dir() %>%
dir_create()
grid_study_area %>% st_write(
dsn = output_shp_study_area)
} else {
grid_study_area <- output_shp_study_area %>% st_read()
}
## Reading layer `Colossoma_grid' from data source
## `/Users/luizesser/Documents/Post-Doc/NAPI-EC/ForestFisher/Modelagens/output_data/grid/Colossoma_grid.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 642 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
Comparing GCMs is important so we don’t need to project all the GCMs. We propose here an alternative method that calculates a k-means given the number of groups you want. First we need to rescale future variables. To make GCMs comparable, we use only one SSP and one time-period. It is only supported, so far, to use bio_1 and bio_12 (both are used to infer other bioclimatic variables).
### Setting inputs and outputs
scenarios_gcm <- 585
gcm_names <- gcms
## Exclude: MIROC_ESM, MIROC_ESM_CHEM, MIROC5 and CSIRO_MK3_6_0 (CMIP5, not CMIP6)
#gcms_result <- compare_gcms(folder_future_rasters, grid_study_area, var_names=c('bio_1','bio_12'), gcm_names, scenarios_gcm, k=5)
#print(gcms_result)
The next step aims to cross data from study area with rasters of variables. We will start with current data.
### Rescaling current data
if (output_shp_current %>% file_exists() == F) {
grid_current <- grid_study_area %>%
add_raster(folder_current_rasters, current_var_names)
output_shp_current %>%
path_dir() %>%
dir_create()
grid_current %>% saveRDS(output_shp_current)
} else {
grid_current <- output_shp_current %>% readRDS()
}
Now within a loop to rescale future variables.
# Downloading future data for selected gcms:
# WorldClim_data('future', variable = 'bioc', year = c('2030','2050','2070','2090'),
# gcm =gcms_result$suggested_gcms, ssp = c('245','585'), resolution = 10)
# Name of R objects (.rds) where the future rescaled variables will be saved.
ssps <- c('ssp245', 'ssp585') # Scenarios names
#gcms <- gcms_result$suggested_gcms
gcms <- c('ac', 'bc', 'ca', 'ev', 'mr')
scenarios <- apply(expand.grid(gcms, ssps, 10, c(2030,2050,2070,2090)), 1, paste, collapse="_")
output_shp_future <- here(paste0("output_data/WorldClim_data_future_rescaled/",
scenarios,
".rds"))
### Rescaling future data
if (!all(output_shp_future %>% file_exists())) {
for (i in 1:length(scenarios)) {
grid_future <- grid_study_area %>%
add_raster(folder_future_rasters, future_var_names, scenarios[i])
output_shp_future %>%
path_dir() %>%
dir_create()
grid_future %>% saveRDS(output_shp_future[grep(scenarios[i], output_shp_future)])
}
}
grid_future <- lapply(output_shp_future,function(x){readRDS(x)})
names(grid_future) <- scenarios
It is necessary to name the output. Be extra careful with extension names. a) Output:
# Set the path to the output shapefile, which will contain the presence/absence matrix.
spp_output <- here("output_data/spp_pa.shp")
It is also necessary to set some other important parameters.
# Species names to be used in the study.
# Names should be identical from input/spp_data.csv obtained previously.
# Setting this to NULL will use all species.
spp_names <- read.csv(spp_data)$species %>% table() %>% names()# or NULL
# Set which CRS projection is the grid_study_area.
crs_occ_data <- "+init=epsg:4326"
occ_shp <- spp_data %>%
occurrences_to_shapefile(spp_names, grid_study_area, CRS(crs_occ_data))
plot(occ_shp)
We will say to the grid_study_area which cells have an occurrence record for the studied species.
if (spp_output %>% file_exists() == F) {
grid_matrix_pa <- occ_shp %>%
occurrences_to_pa_shapefile(grid_study_area, spp_names)
spp_output %>%
path_dir() %>%
dir_create()
grid_matrix_pa %<>% select(-c('x_centroid', 'y_centroid', 'cell_id'))
grid_matrix_pa %>% st_write(dsn = spp_output)
} else {
grid_matrix_pa <- spp_output %>% st_read()
}
## Reading layer `spp_pa' from data source
## `/Users/luizesser/Documents/Post-Doc/NAPI-EC/ForestFisher/Modelagens/output_data/spp_pa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 642 features and 33 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
grid_matrix_pa %>%
mutate(sum = rowSums(across(where(is.numeric)))) %>%
mutate(across(sum, as.factor)) %>%
ggplot() +
geom_sf(aes(fill = sum), color=NA) +
scale_fill_brewer(palette = "YlGnBu")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette YlGnBu is 9
## Returning the palette you asked for with that many colors
Check how many records there is to each species:
sapply(colnames(grid_matrix_pa)[-ncol(grid_matrix_pa)],
function(x){
sum(as.data.frame(grid_matrix_pa)[,x])},
USE.NAMES = T)
## astrnts_cl achnptrch_ brycn_mznc brycn_flct brycn_mlnp clphyss_mc chlcs_ryth
## 93 84 85 106 109 141 63
## clssm_mcrp hmds_mmclt hplrythrn_ lprns_gssz lprns_brnn lprns_fsct lprns_frdr
## 97 78 231 110 60 195 283
## lprns_klsw lthdrs_drs mglprns_tr mtynns_hyp mylpls_str mylpls_rbr mylpls_trq
## 29 20 71 82 111 153 60
## mylossm_rm oxydrs_ngr phrctcphl_ prcts_brch pmlds_blch pltynmtch_ ptrdrs_grn
## 97 108 89 84 227 55 94
## schzdn_fsc srrslms_gl srrslms_sp trprths_lb trprths_ng
## 164 53 117 208 192
Variable Selection must be done OR with a VIF routine, OR with a PCA. To perform a VIF routine, we will use only the environmental data from the occurrence data.
# Obtain occurrence data.frame and environmental variables:
df_vif <- get_df_vif(grid_current, grid_matrix_pa)
## var_names not informed. Variables detected: bio_1, bio_2, bio_3, bio_4, bio_5, bio_6, bio_7, bio_8, bio_9, bio_10, bio_11, bio_12, bio_13, bio_14, bio_15, bio_16, bio_17, bio_18, bio_19sp_names not informed. Detected species: astrnts_cl, achnptrch_, brycn_mznc, brycn_flct, brycn_mlnp, clphyss_mc, chlcs_ryth, clssm_mcrp, hmds_mmclt, hplrythrn_, lprns_gssz, lprns_brnn, lprns_fsct, lprns_frdr, lprns_klsw, lthdrs_drs, mglprns_tr, mtynns_hyp, mylpls_str, mylpls_rbr, mylpls_trq, mylossm_rm, oxydrs_ngr, phrctcphl_, prcts_brch, pmlds_blch, pltynmtch_, ptrdrs_grn, schzdn_fsc, srrslms_gl, srrslms_sp, trprths_lb, trprths_ng
# Run VIF routine from usdm package:
vif_bio <- lapply(df_vif, function(x){vifcor(x,th=0.5)})
vif_bio[[1]]
## 15 variables from the 19 input variables have collinearity problem:
##
## bio_17 bio_10 bio_16 bio_11 bio_1 bio_6 bio_15 bio_5 bio_12 bio_7 bio_9 bio_3 bio_14 bio_19 bio_2
##
## After excluding the collinear variables, the linear correlation coefficients ranges between:
## min correlation ( bio_8 ~ bio_4 ): 0.03229706
## max correlation ( bio_18 ~ bio_4 ): -0.2186792
##
## ---------- VIFs of the remained variables --------
## Variables VIF
## 1 bio_4 1.104933
## 2 bio_8 1.008966
## 3 bio_13 1.063787
## 4 bio_18 1.077752
# We can exclude variables with high VIF or go to the next step.
grid_current_vif <- sapply(names(df_vif), function(x){select(grid_current, !vif_bio[[x]]@excluded)}, USE.NAMES = T)
# Name of the shapefile in which the PCA values will be stored.
shp_pca <- here("output_data/pca/shp_pca.rds")
# Name of the shapefile in which current PCA projection will be stored.
shp_preds <- here("output_data/pca/shp_preds.rds")
# Name of the shapefile in which future PCA projection will be stored.
shp_preds_future <- here("output_data/pca/shp_preds_future.rds")
shp_matrix_pa <- spp_output %>%
st_read()
## Reading layer `spp_pa' from data source
## `/Users/luizesser/Documents/Post-Doc/NAPI-EC/ForestFisher/Modelagens/output_data/spp_pa.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 642 features and 33 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -7600000 ymin: -2500000 xmax: -4600000 ymax: 7e+05
## Projected CRS: WGS 84 / NSIDC EASE-Grid 2.0 Global
df_species <- shp_matrix_pa %>%
as.data.frame() %>%
select(-c('geometry'))
df_var_preditors <- output_shp_current %>%
get_predictors_as_df()
# Names of variables to be normalized (centered and scaled).
# Normalization can improve the modelling, the calculation of PCAs and the clusterization algorithms used to generate pseudoabsences.
var_normalization <- current_var_names # OR: paste0("bio_",1:19)
# Names of variables to be used in PCA.
var_pca_bio <- current_var_names # OR: paste0("bio_",1:19)
# Number of PCA-axes to be retained.
nr_comp_pca_bio <- 4
# Names of variables to be used as predictors when training the models. It can be environmental variables or PCA-axes.
var_predictors <-c("dim_1_bio","dim_2_bio","dim_3_bio","dim_4_bio")
df_potential_predictors <- df_species %>%
bind_cols(df_var_preditors)
df_potential_predictors <- df_potential_predictors %>%
center_scale(var_normalization)
df_potential_predictors %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
df_var_pca <- df_potential_predictors %>%
select(var_pca_bio[which(var_pca_bio %in% colnames(df_potential_predictors))])
df_var_pca %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
df_var_pca %>%
corr_plot()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The table that follows shows the values of the axes and variables. A tabela a seguir mostra os valores dos eixos e variaveis.
if (shp_pca %>% file_exists() == T) {
file_delete(shp_pca)
}
pca_bio <- df_var_pca %>%
calc_pca(nr_comp_pca_bio, "bio")
pca_bio$var$loadings %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
if(!dir_exists('output_data/pca/')){
dir_create('output_data/pca/')
}
grid_study_area %>%
bind_cols(pca_bio$ind$coord %>% as.data.frame()) %>%
saveRDS(file = shp_pca)
Summary of dimensions. The following table shows the correlation between variables with axes (i.e. the significance from each variable to given axle).
pca_bio %>%
dt_pca_summ()
pca_bio %>%
contrib_scree()
pca_bio %>%
contrib_corr()
pca_bio %>%
contrib_dims()
PCA-axes considering the quality of variables’ representation.
pca_bio %>%
pca_cos2()
Quality of variables’ representation on axes.
pca_bio %>%
cos2_dims()
pca_bio %>%
cos2_corr()
pca_bio %>%
pca_bi_plot(df_species %>% rowMeans() %>% ceiling())
pca_bio %>%
comp_pca_retain()
## $broken.stick
## [1] 2
##
## $kaiser.mean
## [1] 4
##
## $horn.p
## [1] 4
df_potential_predictors <- df_potential_predictors %>%
bind_cols(pca_bio$ind$coord %>% as.data.frame())
df_potential_predictors %>%
head() %>%
round(4) %>%
datatable(options = list(pageLength = 10, scrollX=T))
if (shp_preds %>% file_exists() == F){
df_var_preditors <- df_potential_predictors %>%
select(var_predictors %>% all_of())
grid_study_area %>%
select(cell_id) %>%
bind_cols(df_var_preditors) %>%
saveRDS(file = shp_preds)
}
pca_future <- proj_pca(pca_bio, grid_future)
#pca_future
pca_future %>%
saveRDS(file = shp_preds_future)
As in previous steps, it is necessary to set inputs and outputs, taking extra care with extension names. a) Input:
# Let's use the PCA-axes built in the last step:
df_var_preditors <- df_potential_predictors[,colnames(df_potential_predictors) %in% var_predictors]
grid_future <- pca_future
# Name the directory to save trained models.
folder_models <- here("output_data/models")
# Algorithm names to be used in Species Distribution Modeling.
# Run getmethodNames() to unveil which algorithms are available.
algo <- c("svm", "rbf", "mda")
# Set the threshold criteria.
# 1:sp=se, 2:max(se+sp), 3:min(cost), 4:minROCdist, 5:max(kappa), 6:max(ppv+npv), 7:ppv=npv, 8:max(NMI), 9:max(ccr), 10: prevalence
thresh_criteria <- 2
# Number of runs to each algorithm
n_run <- 10
# Number of folds to crossvalidation
n_folds <- 4
# Number of pseudoabsence sets
n_pa <- 1
To build models, it is necessary to use pseudoabsences that contrast to presences. Currently, only the ‘random’ method is applied.
df_pseudoabsences <- shp_matrix_pa %>%
pseudoabsences(df_var_preditors, spp_names, method="random", folder_models)
It is possible to plot a t-SNE graph to check whether pseudoabsence data clusters into a separate group from presence data.
df_potential_predictors %>%
tsne_plot(df_pseudoabsences, spp_names[1])
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
As we are using the sdm package, let’s start to build our models by indicating our input data.
d <- df_species %>%
fit_data(df_var_preditors, df_pseudoabsences)
#d
With the data, we can build our models.
df_species %>%
train_models_to_folder(
d,
algo,
n_run,
n_folds,
folder_models
)
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
##
## The variable importance for all the models are combined (averaged)...
folder_models %>%
dir_ls() %>%
path_file() %>%
head() %>%
as.data.frame()
"Number of trained species: " %>%
paste0(
folder_models %>%
dir_ls() %>%
length()
) %>%
print()
## [1] "Number of trained species: 33"
How many models failed?
d %>%
model_failures(folder_models)
## [1] 0
spp_names <- colnames(df_species)
thresholds_models <- spp_names %>%
sp_thresh_from_folder(folder_models, thresh_criteria)
thresholds_models_means <- spp_names %>%
sp_thresh_mean_from_folder(folder_models, thresholds_models)
thresholds_models_means
## $astrnts_cl
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 astrnts_cl mda 0.479
## 2 astrnts_cl rbf 0.431
## 3 astrnts_cl svm 0.458
##
## $achnptrch_
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 achnptrch_ mda 0.489
## 2 achnptrch_ rbf 0.320
## 3 achnptrch_ svm 0.476
##
## $brycn_mznc
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 brycn_mznc mda 0.486
## 2 brycn_mznc rbf 0.724
## 3 brycn_mznc svm 0.511
##
## $brycn_flct
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 brycn_flct mda 0.439
## 2 brycn_flct rbf 0.363
## 3 brycn_flct svm 0.440
##
## $brycn_mlnp
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 brycn_mlnp mda 0.512
## 2 brycn_mlnp rbf 0.530
## 3 brycn_mlnp svm 0.498
##
## $clphyss_mc
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 clphyss_mc mda 0.464
## 2 clphyss_mc rbf 0.452
## 3 clphyss_mc svm 0.448
##
## $chlcs_ryth
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 chlcs_ryth mda 0.539
## 2 chlcs_ryth rbf 0.733
## 3 chlcs_ryth svm 0.527
##
## $clssm_mcrp
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 clssm_mcrp mda 0.487
## 2 clssm_mcrp rbf 0.417
## 3 clssm_mcrp svm 0.515
##
## $hmds_mmclt
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 hmds_mmclt mda 0.458
## 2 hmds_mmclt rbf 0.220
## 3 hmds_mmclt svm 0.524
##
## $hplrythrn_
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 hplrythrn_ mda 0.443
## 2 hplrythrn_ rbf 0.410
## 3 hplrythrn_ svm 0.452
##
## $lprns_gssz
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lprns_gssz mda 0.528
## 2 lprns_gssz rbf 0.512
## 3 lprns_gssz svm 0.451
##
## $lprns_brnn
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lprns_brnn mda 0.498
## 2 lprns_brnn rbf 0.516
## 3 lprns_brnn svm 0.463
##
## $lprns_fsct
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lprns_fsct mda 0.450
## 2 lprns_fsct rbf 0.462
## 3 lprns_fsct svm 0.480
##
## $lprns_frdr
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lprns_frdr mda 0.444
## 2 lprns_frdr rbf 0.404
## 3 lprns_frdr svm 0.440
##
## $lprns_klsw
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lprns_klsw mda 0.427
## 2 lprns_klsw rbf 0.449
## 3 lprns_klsw svm 0.470
##
## $lthdrs_drs
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 lthdrs_drs mda 0.507
## 2 lthdrs_drs rbf 1.95
## 3 lthdrs_drs svm 0.559
##
## $mglprns_tr
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mglprns_tr mda 0.474
## 2 mglprns_tr rbf 0.373
## 3 mglprns_tr svm 0.472
##
## $mtynns_hyp
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mtynns_hyp mda 0.449
## 2 mtynns_hyp rbf 0.355
## 3 mtynns_hyp svm 0.419
##
## $mylpls_str
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mylpls_str mda 0.457
## 2 mylpls_str rbf 0.376
## 3 mylpls_str svm 0.465
##
## $mylpls_rbr
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mylpls_rbr mda 0.447
## 2 mylpls_rbr rbf 0.444
## 3 mylpls_rbr svm 0.461
##
## $mylpls_trq
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mylpls_trq mda 0.529
## 2 mylpls_trq rbf 0.374
## 3 mylpls_trq svm 0.531
##
## $mylossm_rm
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 mylossm_rm mda 0.482
## 2 mylossm_rm rbf 0.419
## 3 mylossm_rm svm 0.521
##
## $oxydrs_ngr
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 oxydrs_ngr mda 0.465
## 2 oxydrs_ngr rbf 0.429
## 3 oxydrs_ngr svm 0.514
##
## $phrctcphl_
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 phrctcphl_ mda 0.458
## 2 phrctcphl_ rbf 0.482
## 3 phrctcphl_ svm 0.516
##
## $prcts_brch
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 prcts_brch mda 0.488
## 2 prcts_brch rbf 0.442
## 3 prcts_brch svm 0.514
##
## $pmlds_blch
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 pmlds_blch mda 0.443
## 2 pmlds_blch rbf 0.472
## 3 pmlds_blch svm 0.528
##
## $pltynmtch_
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 pltynmtch_ mda 0.464
## 2 pltynmtch_ rbf 0.551
## 3 pltynmtch_ svm 0.415
##
## $ptrdrs_grn
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 ptrdrs_grn mda 0.532
## 2 ptrdrs_grn rbf 0.479
## 3 ptrdrs_grn svm 0.536
##
## $schzdn_fsc
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 schzdn_fsc mda 0.475
## 2 schzdn_fsc rbf 0.598
## 3 schzdn_fsc svm 0.491
##
## $srrslms_gl
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 srrslms_gl mda 0.493
## 2 srrslms_gl rbf 4.62
## 3 srrslms_gl svm 0.502
##
## $srrslms_sp
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 srrslms_sp mda 0.483
## 2 srrslms_sp rbf 0.592
## 3 srrslms_sp svm 0.512
##
## $trprths_lb
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 trprths_lb mda 0.421
## 2 trprths_lb rbf 0.422
## 3 trprths_lb svm 0.465
##
## $trprths_ng
## # A tibble: 3 × 3
## # Rowwise: species, method
## species method mean
## <chr> <chr> <dbl>
## 1 trprths_ng mda 0.453
## 2 trprths_ng rbf 0.432
## 3 trprths_ng svm 0.431
To project our models in space, we need to set where the models were saved (previously set as the folder_models object) and where we want to save our projections.
directory_projections <- here("output_data/projections")
Set up some variables.
df_pa <- shp_matrix_pa %>%
as.data.frame() %>%
select(-c('geometry'))
df_potential_predictors <- df_pa %>%
bind_cols(df_var_preditors)
projection_data <- lapply(grid_future, function(x){ x <- as.data.frame(x)
x[!(names(x) %in% c("x_centroid", "y_centroid", "geometry"))]})
projection_data$current <- df_var_preditors
And finally run our projections.
# Project models in scenarios
df_pa %>% predict_to_folder(
projection_data,
folder_models,
grid_study_area,
algo,
thresh_criteria,
directory_projections,
thresholds_models_means
)
consensus <- function(spp_names){lapply(spp_names, function(y){
sapply(dir_ls(glob = paste0("*", y, "*pa.csv"), recurse = T) %>% purrr::map(vroom, show_col_types = FALSE),
function(x){data.frame(c=x$consensus)},
simplify = T) %>%
as.data.frame() %>%
rowSums()}) %>%
as.data.frame() %>%
rowSums()}
consensus_vals <- consensus(spp_names)
p99 <- consensus_vals %>% quantile(probs=0.99)
p95 <- consensus_vals %>% quantile(probs=0.95)
ensemble_map(p99, grid_study_area)
## Warning in data.frame(...): row names were found from a short variable and have
## been discarded
ensemble_map(p95, grid_study_area)
## Warning in data.frame(...): row names were found from a short variable and have
## been discarded
ensemble_map(consensus_vals, grid_study_area)